import pandas as pd
import numpy as np
df = pd.read_csv("county_annual.csv")
df_annualnull = df[df.isnull().any(axis=1) & (df['BUYER_STATE'] != "PR")]
print(df_annualnull)
Unnamed: 0 BUYER_COUNTY BUYER_STATE year count DOSAGE_UNIT \
10071 10072 GUAM GU 2006 319 265348.0
10072 10073 GUAM GU 2007 330 275600.0
10073 10074 GUAM GU 2008 313 286900.0
10074 10075 GUAM GU 2009 390 355300.0
10075 10076 GUAM GU 2010 510 413800.0
... ... ... ... ... ... ...
27751 27752 NaN MA 2006 247 114900.0
27752 27753 NaN NV 2006 380 173600.0
27753 27754 NaN NV 2007 447 200600.0
27754 27755 NaN NV 2008 5 2200.0
27755 27756 NaN OH 2006 23 5100.0
countyfips
10071 NaN
10072 NaN
10073 NaN
10074 NaN
10075 NaN
... ...
27751 NaN
27752 NaN
27753 NaN
27754 NaN
27755 NaN
[74 rows x 7 columns]
df_county = df.dropna(subset=['BUYER_COUNTY'])
df_county
| Unnamed: 0 | BUYER_COUNTY | BUYER_STATE | year | count | DOSAGE_UNIT | countyfips | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | ABBEVILLE | SC | 2006 | 877 | 363620.0 | 45001.0 |
| 1 | 2 | ABBEVILLE | SC | 2007 | 908 | 402940.0 | 45001.0 |
| 2 | 3 | ABBEVILLE | SC | 2008 | 871 | 424590.0 | 45001.0 |
| 3 | 4 | ABBEVILLE | SC | 2009 | 930 | 467230.0 | 45001.0 |
| 4 | 5 | ABBEVILLE | SC | 2010 | 1197 | 539280.0 | 45001.0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 27736 | 27737 | ZAVALA | TX | 2010 | 248 | 200100.0 | 48507.0 |
| 27737 | 27738 | ZAVALA | TX | 2011 | 406 | 244800.0 | 48507.0 |
| 27738 | 27739 | ZAVALA | TX | 2012 | 473 | 263700.0 | 48507.0 |
| 27739 | 27740 | ZAVALA | TX | 2013 | 399 | 186700.0 | 48507.0 |
| 27740 | 27741 | ZAVALA | TX | 2014 | 162 | 148930.0 | 48507.0 |
27741 rows × 7 columns
## Question 4
df_mean = df_county[['year','DOSAGE_UNIT']]
df_meandosage = df_mean.groupby(['year']).mean().reset_index()
print(df_meandosage)
year DOSAGE_UNIT 0 2006 2.654669e+06 1 2007 2.995906e+06 2 2008 3.254271e+06 3 2009 3.526038e+06 4 2010 3.783656e+06 5 2011 4.035583e+06 6 2012 3.993064e+06 7 2013 3.861752e+06 8 2014 3.768738e+06
import plotly.express as px
fig = px.scatter(df_meandosage, x="year", y="DOSAGE_UNIT")
fig.show()